/**********************************************************************/
/*
   Author: Karan Makkar
   Created: Dec 2024
   Updated: Aug 2025, Youssef Assarssah
   Description: Merges de-identified PMO data transfers:
	 	1. Batch 1 - 16
		2. Batch 1 - 16 randomization weights
		3. Batch 17 and weights
		6. Batch 18 - 22 and weights
		8. Additional variables for batch 1 - 22, including previously dropped observations
		9. Baseline characteristics for newly added observations in batch 1-22
		10. Updated location variables in batch 1-22
		12. Updated treatment status up to batch 47

   Outputs data at person-batch level
   Note: to set filepaths, run MASTER.do.

   Outputs:
   pmo_b1-22_raw_long.dta

*/
/**********************************************************************/

	* Set Filepaths
	if "$master_run" !="1" include "./Do/SET_FILEPATHS.do"
	* Log
	cap log close
	local prefix: display %tdCYND td(`c(current_date)')
	log using "$KP_logs/`prefix'_PMO_merge_long_b1-22", replace text

/*----------------------------------------------------*/
            /* Section: Append batches */
/*----------------------------------------------------*/

* load first batch
	u "$KP_deid_admin/Raw/JPAL_DATA_1/cleanedbatch_1.dta", clear
	gisid anon_id4

* merge in randomization weight
	merge 1:1 anon_id4 using "$KP_deid_admin/Raw/JPAL_DATA_2/batch_1.dta", assert(3) nogen
	assert !missing(bobot)
	rename bobot ticket_score
	rename test_score final_test_score

* merge in updated domicili
	merge 1:1 anon_id4 using "$KP_deid_admin/Raw/JPAL_DATA_10/domisili_1.dta", keep(1 3) keepusing(anon_prov_id java urban) nogen

* merge in additional variables
	rename has_passed_current_batch has_passed_current_batch_old
	merge 1:1 anon_id4 using "$KP_deid_admin/Raw/JPAL_DATA_8/additional variable_1.dta"

* merge in baseline variables
	merge 1:1 anon_id4 using "$KP_deid_admin/Raw/JPAL_DATA_9/final add baseline_1.dta", nogen update keep(1 3 4 5)

	gen batch = 1

	gen old_ob_only = 0
	gen new_ob_only = 0
	gen match_add_var = 0
	replace old_ob_only = 1 if _merge == 1
	replace new_ob_only = 1 if _merge == 2
	replace match_add_var = 1 if _merge == 3
	drop _merge

* loop to append each batch
* Batch 18-22 does not have code_domisili, instead province_code/name, city_code/name
* Batch 18-22: final_test_score instead of test_score
* Batch 18:22 ticket_score instaed of bobot
	qui forval i = 2 / 22 {
		noi di "Appending Batch `i'"

		preserve

		if `i' <= 16 {
			u "$KP_deid_admin/Raw/JPAL_DATA_1/cleanedbatch_`i'.dta", clear
			merge 1:1 anon_id4 using "$KP_deid_admin/Raw/JPAL_DATA_2/batch_`i'.dta", assert(3) nogen
			assert !missing(bobot)
			rename bobot ticket_score
			rename test_score final_test_score
		}
		else if `i' == 17 {
			u "$KP_deid_admin/Raw/JPAL_DATA_3/cleanedbatch_17.dta", clear
			rename final_score ticket_score
			rename test_score final_test_score
		}
		else {
			u "$KP_deid_admin/Raw/JPAL_DATA_6/finalcleanedbatch_`i'.dta", clear
		}

		gisid anon_id4

	* merge in updated domicili
		merge 1:1 anon_id4 using "$KP_deid_admin/Raw/JPAL_DATA_10/domisili_`i'.dta", keep(1 3) keepusing(anon_prov_id java urban) nogen

	* Merge in additional variables
		rename has_passed_current_batch has_passed_current_batch_old
		merge 1:1 anon_id4 using "$KP_deid_admin/Raw/JPAL_DATA_8/additional variable_`i'.dta"

	* merge in baseline variables
		merge 1:1 anon_id4 using "$KP_deid_admin/Raw/JPAL_DATA_9/final add baseline_`i'.dta", nogen update keep(1 3 4 5)

		gen batch = `i'

		gen old_ob_only = 0
		gen new_ob_only = 0
		gen match_add_var = 0
		replace old_ob_only = 1 if _merge == 1
		replace new_ob_only = 1 if _merge == 2
		replace match_add_var = 1 if _merge == 3
		drop _merge

		tempfile `i'
		qui save ``i''
		restore
		append using ``i''
	}

* merge in data transfer 12 - updated treatment status for 2022 control group survey respondents
	rename batch_sk batch_sk_39
	merge m:1 anon_id4 using "$KP_deid_admin/Raw/JPAL_DATA_12/[J-PAL] CGS Update Treatment Status Batch 47.dta", gen(merge_t12) assert(1 3)
	rename batch_sk batch_sk_47

* Check uniqueness
	gisid anon_id4 batch
	gsort anon_id4 batch

* Save raw merged data
datasignature 
if "`r(datasignature)'" == "58864849:50(90903):310014626:1706635967" {
   save "$KP_deid_admin/Raw/Merged/pmo_b1-22_raw_long.dta", replace
      }
else {
   di as err "Careful, your machine produces a different dataset"
   stop
		}
	

	cap log close


// DONE
